<!DOCTYPE html>
<html>
<head><meta name="generator" content="Hexo 3.8.0">
    
<!-- Google Analytics -->
<script>
window.ga=window.ga||function(){(ga.q=ga.q||[]).push(arguments)};ga.l=+new Date;
ga('create', 'UA-83895646-2', 'auto');
ga('send', 'pageview');
</script>
<script async src="https://www.google-analytics.com/analytics.js"></script>
<!-- End Google Analytics -->


    

    
<!-- Baidu Tongji -->
<script>var _hmt = _hmt || []</script>
<script async src="//hm.baidu.com/hm.js?c199e990e3c84b0b1047773457ba7222"></script>
<!-- End Baidu Tongji -->




    <meta charset="utf-8">
    
    <meta name="google-site-verification" content="google0706f42c5c9ccd5b.html">
    
    
    
    
    <title>解决GitHub Pages屏蔽百度爬虫的方法 | 逗哥的代码作坊 | 互联网是一门实践性科学</title>
    <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
    
    <meta name="theme-color" content="#3F51B5">
    
    
    <meta name="keywords" content="github,seo">
    <meta name="description" content="Github屏蔽百度爬虫导致在Github Pages上托管的博客、网站都无法被百度索引到，但对于国内的小伙伴尤其是还在上学的未来的程序员们百度还是一个重要的检索渠道。已经有小伙伴在这方面做了尝试并且进行了全面的分析，这里我仅仅介绍一下个人的做法。这个博客也托管在Github Pages上，个人没有虚拟主机、域名也懒得备案，主要就是通过SAE的免费主机加智能DNS解决的。  新浪云开始征收每天10">
<meta name="keywords" content="github,seo">
<meta property="og:type" content="article">
<meta property="og:title" content="解决GitHub Pages屏蔽百度爬虫的方法">
<meta property="og:url" content="http://blog.beanmr.com/2016-2-24-solve-github-baidu-spider-blocking/index.html">
<meta property="og:site_name" content="逗哥的代码作坊">
<meta property="og:description" content="Github屏蔽百度爬虫导致在Github Pages上托管的博客、网站都无法被百度索引到，但对于国内的小伙伴尤其是还在上学的未来的程序员们百度还是一个重要的检索渠道。已经有小伙伴在这方面做了尝试并且进行了全面的分析，这里我仅仅介绍一下个人的做法。这个博客也托管在Github Pages上，个人没有虚拟主机、域名也懒得备案，主要就是通过SAE的免费主机加智能DNS解决的。  新浪云开始征收每天10">
<meta property="og:locale" content="zh-CN">
<meta property="og:image" content="http://blog.beanmr.com/2016-2-24-solve-github-baidu-spider-blocking/1.png">
<meta property="og:image" content="http://blog.beanmr.com/2016-2-24-solve-github-baidu-spider-blocking/2.png">
<meta property="og:image" content="http://blog.beanmr.com/2016-2-24-solve-github-baidu-spider-blocking/3.png">
<meta property="og:image" content="http://blog.beanmr.com/2016-2-24-solve-github-baidu-spider-blocking/4.png">
<meta property="og:image" content="http://blog.beanmr.com/2016-2-24-solve-github-baidu-spider-blocking/5.png">
<meta property="og:updated_time" content="2019-03-19T03:49:07.004Z">
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="解决GitHub Pages屏蔽百度爬虫的方法">
<meta name="twitter:description" content="Github屏蔽百度爬虫导致在Github Pages上托管的博客、网站都无法被百度索引到，但对于国内的小伙伴尤其是还在上学的未来的程序员们百度还是一个重要的检索渠道。已经有小伙伴在这方面做了尝试并且进行了全面的分析，这里我仅仅介绍一下个人的做法。这个博客也托管在Github Pages上，个人没有虚拟主机、域名也懒得备案，主要就是通过SAE的免费主机加智能DNS解决的。  新浪云开始征收每天10">
<meta name="twitter:image" content="http://blog.beanmr.com/2016-2-24-solve-github-baidu-spider-blocking/1.png">
    
        <link rel="alternate" type="application/atom+xml" title="逗哥的代码作坊" href="/atom.xml">
    
    <link rel="shortcut icon" href="/favicon.ico">
    <link rel="stylesheet" href="//unpkg.com/hexo-theme-material-indigo@latest/css/style.css">
    <script>window.lazyScripts=[]</script>

    <!-- custom head -->
    

</head>

<body>
    <div id="loading" class="active"></div>

    <aside id="menu" class="hide">
  <div class="inner flex-row-vertical">
    <a href="javascript:;" class="header-icon waves-effect waves-circle waves-light" id="menu-off">
        <i class="icon icon-lg icon-close"></i>
    </a>
    <div class="brand-wrap" style="background-image:url(/img/brand.jpg)">
      <div class="brand">
        <a href="/" class="avatar waves-effect waves-circle waves-light">
          <img src="/img/avatar.jpg">
        </a>
        <hgroup class="introduce">
          <h5 class="nickname">FengDD</h5>
          <a href="mailto:beanmr@gmail.com" title="beanmr@gmail.com" class="mail">beanmr@gmail.com</a>
        </hgroup>
      </div>
    </div>
    <div class="scroll-wrap flex-col">
      <ul class="nav">
        
            <li class="waves-block waves-effect">
              <a href="/">
                <i class="icon icon-lg icon-home"></i>
                主页
              </a>
            </li>
        
            <li class="waves-block waves-effect">
              <a href="/archives">
                <i class="icon icon-lg icon-archives"></i>
                Archives
              </a>
            </li>
        
            <li class="waves-block waves-effect">
              <a href="/tags">
                <i class="icon icon-lg icon-tags"></i>
                Tags
              </a>
            </li>
        
            <li class="waves-block waves-effect">
              <a href="/categories">
                <i class="icon icon-lg icon-th-list"></i>
                Categories
              </a>
            </li>
        
            <li class="waves-block waves-effect">
              <a href="https://github.com/BeanMr" target="_blank">
                <i class="icon icon-lg icon-github"></i>
                Github
              </a>
            </li>
        
            <li class="waves-block waves-effect">
              <a href="http://www.weibo.com/beanmr" target="_blank">
                <i class="icon icon-lg icon-weibo"></i>
                Weibo
              </a>
            </li>
        
      </ul>
    </div>
  </div>
</aside>

    <main id="main">
        <header class="top-header" id="header">
    <div class="flex-row">
        <a href="javascript:;" class="header-icon waves-effect waves-circle waves-light on" id="menu-toggle">
          <i class="icon icon-lg icon-navicon"></i>
        </a>
        <div class="flex-col header-title ellipsis">解决GitHub Pages屏蔽百度爬虫的方法</div>
        
        <div class="search-wrap" id="search-wrap">
            <a href="javascript:;" class="header-icon waves-effect waves-circle waves-light" id="back">
                <i class="icon icon-lg icon-chevron-left"></i>
            </a>
            <input type="text" id="key" class="search-input" autocomplete="off" placeholder="输入感兴趣的关键字">
            <a href="javascript:;" class="header-icon waves-effect waves-circle waves-light" id="search">
                <i class="icon icon-lg icon-search"></i>
            </a>
        </div>
        
        
        <a href="javascript:;" class="header-icon waves-effect waves-circle waves-light" id="menuShare">
            <i class="icon icon-lg icon-share-alt"></i>
        </a>
        
    </div>
</header>
<header class="content-header post-header">

    <div class="container fade-scale">
        <h1 class="title">解决GitHub Pages屏蔽百度爬虫的方法</h1>
        <h5 class="subtitle">
            
                <time datetime="2019-03-19T03:49:07.001Z" itemprop="datePublished" class="page-time">
  2019-03-19
</time>


	<ul class="article-category-list"><li class="article-category-list-item"><a class="article-category-list-link" href="/categories/Jekyll/">Jekyll</a></li></ul>

            
        </h5>
    </div>

    


</header>


<div class="container body-wrap">
    
    <aside class="post-widget">
        <nav class="post-toc-wrap post-toc-shrink" id="post-toc">
            <h4>TOC</h4>
            <ol class="post-toc"><li class="post-toc-item post-toc-level-2"><a class="post-toc-link" href="#可行性及原理分析"><span class="post-toc-number">1.</span> <span class="post-toc-text">可行性及原理分析</span></a></li><li class="post-toc-item post-toc-level-2"><a class="post-toc-link" href="#操作手册"><span class="post-toc-number">2.</span> <span class="post-toc-text">操作手册</span></a></li></ol>
        </nav>
    </aside>


<article id="post-2016-2-24-solve-github-baidu-spider-blocking" class="post-article article-type-post fade" itemprop="blogPost">

    <div class="post-card">
        <h1 class="post-card-title">解决GitHub Pages屏蔽百度爬虫的方法</h1>
        <div class="post-meta">
            <time class="post-time" title="2019-03-19 11:49:07" datetime="2019-03-19T03:49:07.001Z" itemprop="datePublished">2019-03-19</time>

            
	<ul class="article-category-list"><li class="article-category-list-item"><a class="article-category-list-link" href="/categories/Jekyll/">Jekyll</a></li></ul>



            
<span id="busuanzi_container_page_pv" title="文章总阅读量" style="display:none">
    <i class="icon icon-eye icon-pr"></i><span id="busuanzi_value_page_pv"></span>
</span>


        </div>
        <div class="post-content" id="post-content" itemprop="postContent">
            <p>Github屏蔽百度爬虫导致在Github Pages上托管的博客、网站都无法被百度索引到，但对于国内的小伙伴尤其是还在上学的未来的程序员们百度还是一个重要的检索渠道。已经有小伙伴在这方面做了尝试并且进行了全面的分析，这里我仅仅介绍一下个人的做法。这个博客也托管在Github Pages上，个人没有虚拟主机、域名也懒得备案，主要就是通过SAE的免费主机加智能DNS解决的。</p>
<blockquote>
<p>新浪云开始征收每天10云豆（一毛钱）的最低租金，此方案不再严格完全免费。</p>
<p>但充值200元可以在新浪云代办网站备案，之后采用七牛方案也是个不错的选择。</p>
<p>文章关键点在于智能DNS的应用故保留此文章于此</p>
</blockquote>
<h2 id="可行性及原理分析"><a href="#可行性及原理分析" class="headerlink" title="可行性及原理分析"></a>可行性及原理分析</h2><p>已经有小伙伴在这方面做了尝试，<a href="http://jerryzou.com/posts/feasibility-of-allowing-baiduSpider-for-Github-Pages/?utm_source=tuicool" target="_blank" rel="noopener">文章</a>从原理到实践写的很详尽。其主要思路是，希望通过CDN的缓存拦截百度爬虫访问Github服务器，防止百度爬虫到Github服务器被暴揍。但是从CDN的角度，各个厂商还专门发展<code>搜索引擎自动回源</code>所以人家本身就不是准备干这活的。最后小伙伴也采用了个人虚拟主机的方案而且提供了Github的Webhook自动部署实践的介绍。这位叫Jerry的小伙伴棒棒嗒！</p>
<p>另外也有一部分使用七牛存储的小伙伴，尝试通过在七牛上保存网站的静态文件镜像来服务百度爬虫。主要的优势是七牛的流量和空间很足，只要充值10元就可以绑定自定义域名；但是死穴在于像我这种懒得备案的域名七牛不允许绑定。</p>
<p>小站最后采用了新浪云主机(SAE)+智能DNS(本人万网)+百度云CDN解决。思路上还是智能DNS针对来自百度解析线路的请求指向SAE服务器，SAE服务器保存Jekyll生成的静态文件当镜像。使用百度CDN的原因并不是为了加速，而是因为百度爬虫机器好像几乎不鸟万网的智能DNS，也就是说万网经常错误返回给百度默认的结果，但所幸对百度CDN的DNS同步做的很好所以加了这个中间层。</p>
<p>如果万网智能DNS很好用理想的路径如下：</p>
<img src="/2016-2-24-solve-github-baidu-spider-blocking/1.png">
<p>添加了百度CDN以后的路径如下：</p>
<img src="/2016-2-24-solve-github-baidu-spider-blocking/2.png">
<p>有趣的是百度云CDN有两个而且两个都是真的，一个是我用的免费的<a href="http://su.baidu.com/" target="_blank" rel="noopener">百度云加速</a>另一个是百度云CDN。</p>
<h2 id="操作手册"><a href="#操作手册" class="headerlink" title="操作手册"></a>操作手册</h2><ol>
<li>注册SAE的账号并创建一个<code>PHP空应用</code>；因为PHP的应用收费最低基本每天几个云豆，点我的连接注册送1000云豆够用好久了，我们只拿它当是一个Nginx服务器用。》》》<a href="http://t.cn/RGKjo3K" target="_blank" rel="noopener">点我注册啊</a>《《《<img src="/2016-2-24-solve-github-baidu-spider-blocking/3.png"></li>
<li><p>从应用后台获取代码管理地址，我选用的是git仓库方式。用Github Pages的朋友没有不会的吧，注意因为SAE支持多版本部署所以push的时候要指定。</p>
 <figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">git remote add sae https://git.sinacloud.com/应用名</span><br></pre></td></tr></table></figure>
 <figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">git push sae master:1</span><br></pre></td></tr></table></figure>
</li>
<li><p><code>jekyll clean</code> <code>jekyll build</code> 拷贝<code>_site</code>到SAE的git然后push</p>
</li>
<li>通过SAE的提供的应测试你的站点 http://应用名.applinzi.com/</li>
<li>到百度云加速添加自己的网站<img src="/2016-2-24-solve-github-baidu-spider-blocking/4.png"></li>
<li>配置你的DNS服务并测试<img src="/2016-2-24-solve-github-baidu-spider-blocking/5.png"></li>
<li>手工同步部分：因为我做了文章和Jekyll源码的分离发布文章总要执行命令所以写了脚本</li>
</ol>

        </div>

        <blockquote class="post-copyright">
    
    <div class="content">
        
<span class="post-time">
    最后更新时间：<time datetime="2019-03-19T03:49:07.004Z" itemprop="dateUpdated">2019-03-19 11:49:07</time>
</span><br>


        
        本文采用 <a href="https://creativecommons.org/licenses/by/4.0/">CC BY-SA</a> 许可发布，您可以自由的转载分享。<br>转载请保留出处...<br><a href="/2016-2-24-solve-github-baidu-spider-blocking/" target="_blank" rel="external">http://blog.beanmr.com/2016-2-24-solve-github-baidu-spider-blocking/</a>
        
    </div>
    
    <footer>
        <a href="http://blog.beanmr.com">
            <img src="/img/avatar.jpg" alt="FengDD">
            FengDD
        </a>
    </footer>
</blockquote>

        


        <div class="post-footer">
            
	<ul class="article-tag-list"><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/github/">github</a></li><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/seo/">seo</a></li></ul>


            
<div class="page-share-wrap">
    

<div class="page-share" id="pageShare">
    <ul class="reset share-icons">
      <li>
        <a class="weibo share-sns" target="_blank" href="http://service.weibo.com/share/share.php?url=http://blog.beanmr.com/2016-2-24-solve-github-baidu-spider-blocking/&title=《解决GitHub Pages屏蔽百度爬虫的方法》 — 逗哥的代码作坊&pic=http://blog.beanmr.com/img/avatar.jpg" data-title="微博">
          <i class="icon icon-weibo"></i>
        </a>
      </li>
      <li>
        <a class="weixin share-sns wxFab" href="javascript:;" data-title="微信">
          <i class="icon icon-weixin"></i>
        </a>
      </li>
      <li>
        <a class="qq share-sns" target="_blank" href="http://connect.qq.com/widget/shareqq/index.html?url=http://blog.beanmr.com/2016-2-24-solve-github-baidu-spider-blocking/&title=《解决GitHub Pages屏蔽百度爬虫的方法》 — 逗哥的代码作坊&source=" data-title=" QQ">
          <i class="icon icon-qq"></i>
        </a>
      </li>
      <li>
        <a class="facebook share-sns" target="_blank" href="https://www.facebook.com/sharer/sharer.php?u=http://blog.beanmr.com/2016-2-24-solve-github-baidu-spider-blocking/" data-title=" Facebook">
          <i class="icon icon-facebook"></i>
        </a>
      </li>
      <li>
        <a class="twitter share-sns" target="_blank" href="https://twitter.com/intent/tweet?text=《解决GitHub Pages屏蔽百度爬虫的方法》 — 逗哥的代码作坊&url=http://blog.beanmr.com/2016-2-24-solve-github-baidu-spider-blocking/&via=http://blog.beanmr.com" data-title=" Twitter">
          <i class="icon icon-twitter"></i>
        </a>
      </li>
      <li>
        <a class="google share-sns" target="_blank" href="https://plus.google.com/share?url=http://blog.beanmr.com/2016-2-24-solve-github-baidu-spider-blocking/" data-title=" Google+">
          <i class="icon icon-google-plus"></i>
        </a>
      </li>
    </ul>
 </div>



    <a href="javascript:;" id="shareFab" class="page-share-fab waves-effect waves-circle">
        <i class="icon icon-share-alt icon-lg"></i>
    </a>
</div>



        </div>
    </div>

    
<nav class="post-nav flex-row flex-justify-between">
  
    <div class="waves-block waves-effect prev">
      <a href="/2016-2-29-a-hardware-view-for-java-cpu-3/" id="post-prev" class="post-nav-link">
        <div class="tips"><i class="icon icon-angle-left icon-lg icon-pr"></i> Prev</div>
        <h4 class="title">Java工程师要懂的硬件知识-CPU-3-Java与分支预测</h4>
      </a>
    </div>
  

  
    <div class="waves-block waves-effect next">
      <a href="/2016-2-20-a-hardware-view-for-java-cpu-1/" id="post-next" class="post-nav-link">
        <div class="tips">Next <i class="icon icon-angle-right icon-lg icon-pl"></i></div>
        <h4 class="title">Java工程师要懂的硬件知识-CPU-1-基础</h4>
      </a>
    </div>
  
</nav>



    











    <!-- Valine Comments -->
    <div class="comments vcomment" id="comments"></div>
    <script src="//cdn1.lncld.net/static/js/3.0.4/av-min.js"></script>
    <script src="//unpkg.com/valine@latest/dist/Valine.min.js"></script>
    <!-- Valine Comments script -->
    <script>
        var GUEST_INFO = ['nick','mail','link'];
        var guest_info = 'nick,mail,link'.split(',').filter(function(item){
          return GUEST_INFO.indexOf(item) > -1
        });
        new Valine({
            el: '#comments',
            notify: 'false' == 'true',
            verify: 'false' == 'true',
            appId: "li3DyMdVK1kubJOWaSi7XXX6-gzGzoHsz",
            appKey: "XTYDVacuFynCNMJYgQUrn9Ol",
            avatar: "mm",
            placeholder: "简单说两句？？",
            guest_info: guest_info.length == 0 ? GUEST_INFO : guest_info,
            pageSize: "10"
        })
    </script>
    <!-- Valine Comments end -->










</article>



</div>

        <footer class="footer">
    <div class="top">
        
<p>
    <span id="busuanzi_container_site_uv" style="display:none">
        站点总访客数：<span id="busuanzi_value_site_uv"></span>
    </span>
    <span id="busuanzi_container_site_pv" style="display:none">
        站点总访问量：<span id="busuanzi_value_site_pv"></span>
    </span>
</p>


        <p>
            
                <span><a href="/atom.xml" target="_blank" class="rss" title="rss"><i class="icon icon-lg icon-rss"></i></a></span>
            
            <span>博客内容遵循 <a rel="license" href="https://creativecommons.org/licenses/by-nc-sa/4.0/deed.zh">知识共享 署名 - 非商业性 - 相同方式共享 4.0 国际协议</a></span>
        </p>
    </div>
    <div class="bottom">
        <p><span>FengDD &copy; 2015 - 2019</span>
            <span>
                
                Power by <a href="http://hexo.io/" target="_blank">Hexo</a> Theme <a href="https://github.com/yscoder/hexo-theme-indigo" target="_blank">indigo</a>
            </span>
        </p>
    </div>
</footer>

    </main>
    <div class="mask" id="mask"></div>
<a href="javascript:;" id="gotop" class="waves-effect waves-circle waves-light"><span class="icon icon-lg icon-chevron-up"></span></a>



<div class="global-share" id="globalShare">
    <ul class="reset share-icons">
      <li>
        <a class="weibo share-sns" target="_blank" href="http://service.weibo.com/share/share.php?url=http://blog.beanmr.com/2016-2-24-solve-github-baidu-spider-blocking/&title=《解决GitHub Pages屏蔽百度爬虫的方法》 — 逗哥的代码作坊&pic=http://blog.beanmr.com/img/avatar.jpg" data-title="微博">
          <i class="icon icon-weibo"></i>
        </a>
      </li>
      <li>
        <a class="weixin share-sns wxFab" href="javascript:;" data-title="微信">
          <i class="icon icon-weixin"></i>
        </a>
      </li>
      <li>
        <a class="qq share-sns" target="_blank" href="http://connect.qq.com/widget/shareqq/index.html?url=http://blog.beanmr.com/2016-2-24-solve-github-baidu-spider-blocking/&title=《解决GitHub Pages屏蔽百度爬虫的方法》 — 逗哥的代码作坊&source=" data-title=" QQ">
          <i class="icon icon-qq"></i>
        </a>
      </li>
      <li>
        <a class="facebook share-sns" target="_blank" href="https://www.facebook.com/sharer/sharer.php?u=http://blog.beanmr.com/2016-2-24-solve-github-baidu-spider-blocking/" data-title=" Facebook">
          <i class="icon icon-facebook"></i>
        </a>
      </li>
      <li>
        <a class="twitter share-sns" target="_blank" href="https://twitter.com/intent/tweet?text=《解决GitHub Pages屏蔽百度爬虫的方法》 — 逗哥的代码作坊&url=http://blog.beanmr.com/2016-2-24-solve-github-baidu-spider-blocking/&via=http://blog.beanmr.com" data-title=" Twitter">
          <i class="icon icon-twitter"></i>
        </a>
      </li>
      <li>
        <a class="google share-sns" target="_blank" href="https://plus.google.com/share?url=http://blog.beanmr.com/2016-2-24-solve-github-baidu-spider-blocking/" data-title=" Google+">
          <i class="icon icon-google-plus"></i>
        </a>
      </li>
    </ul>
 </div>


<div class="page-modal wx-share" id="wxShare">
    <a class="close" href="javascript:;"><i class="icon icon-close"></i></a>
    <p>扫一扫，分享到微信</p>
    <img src="" alt="微信分享二维码">
</div>




    <script src="//cdn.bootcss.com/node-waves/0.7.4/waves.min.js"></script>
<script>
var BLOG = { ROOT: '/', SHARE: true, REWARD: false };


</script>

<script src="//unpkg.com/hexo-theme-material-indigo@latest/js/main.min.js"></script>


<div class="search-panel" id="search-panel">
    <ul class="search-result" id="search-result"></ul>
</div>
<template id="search-tpl">
<li class="item">
    <a href="{path}" class="waves-block waves-effect">
        <div class="title ellipsis" title="{title}">{title}</div>
        <div class="flex-row flex-middle">
            <div class="tags ellipsis">
                {tags}
            </div>
            <time class="flex-col time">{date}</time>
        </div>
    </a>
</li>
</template>

<script src="//unpkg.com/hexo-theme-material-indigo@latest/js/search.min.js" async></script>






<script async src="//busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js"></script>



<script>
(function() {
    var OriginTitile = document.title, titleTime;
    document.addEventListener('visibilitychange', function() {
        if (document.hidden) {
            document.title = '逗哥等你回来';
            clearTimeout(titleTime);
        } else {
            document.title = '(つェ⊂)咦!回来啦!';
            titleTime = setTimeout(function() {
                document.title = OriginTitile;
            },2000);
        }
    });
})();
</script>



</body>
</html>
